#############################################################################
# FigureC3.R
# Aim: to replicate Figure C3 in Atsusaka and Stevenson (2021)
# Run time: about 7 seconds
#############################################################################

rm(list=ls())
start <- Sys.time()
library(tidyverse)

###############################################################################################    
# (1) WRITING A FUNCTION
###############################################################################################   

cmreg <- function(formula, p, p2, data, init){
  
  i.logit <- function(XB){ exp(XB)/(1 + exp(XB))}
  
  df <- model.frame(formula, data, na.action = na.omit)
  X0 <- model.matrix.default(formula, df) # Data matrix including A
  X1 <- X0[, -dim(X0)[2]]                 # Matrix with 1 and predictors
  A <- df[, dim(X0)[2]]
  Y <- df[,1]
  p <- p
  p2 <- p2
  k <- dim(X1)[2]      # Number of beta parameters
  k.t <- k + 1         # Start of theta parameters
  k.t.e <- 2*k         # End of theta parameters

  init <- init         # Initial values for optim
  
  # LOG-LIKELIHOOD FUNCTION
  log.L <- function(par) {
    sum(   Y*log(            ((2*p-1)*(i.logit(X1 %*% par[1:k])) + (0.5-p) )*i.logit(X1 %*% par[k.t:k.t.e]) + 0.5)
           + (1-Y)*log( 1 - (((2*p-1)*(i.logit(X1 %*% par[1:k])) + (0.5-p) )*i.logit(X1 %*% par[k.t:k.t.e]) + 0.5))
           + A*log(        (0.5-p2)*i.logit(X1 %*% par[k.t:k.t.e]) + 0.5 )
           + (1-A)*log(1- ((0.5-p2)*i.logit(X1 %*% par[k.t:k.t.e]) + 0.5 ))
    )
  }
  
  
  # MAXIMIZATION
  MLE = optim(par=init,                   # initial values for beta and theta
              fn = log.L,                    # function to maximize
              method = "BFGS",               # this method lets set lower bounds (Modified Newton method)
              control = list(maxit=800, fnscale = -1),  # maximize the function
              hessian = TRUE)                # calculate Hessian matricce because we will need for confidence intervals
  
  H = MLE$hessian                            # Hessian matrix
  Var.hat = diag(-solve(H))                  # Variance as the negative inverse of the Hessian matrix (Dropping covariances)
  SE = sqrt(Var.hat)                         # Standard errors
  
  
  Mlist <- list()
  Mlist[[1]] <- MLE$par
  Mlist[[2]] <- SE
  
  return(Mlist)
  
  # Next: make pretty summary
}


#############################################################################
# (2) DATA GENERATING PROCESS
#############################################################################
i.logit <- function(XB){ exp(XB)/(1 + exp(XB))}
set.seed(1234)

Nvec = c(250, 500, 1000, 2000, 3000, 4000, 5000)                                             # Sample size
beta0  = NA
beta1  = NA
beta2  = NA
theta0 = NA
theta1 = NA
theta2 = NA
beta0.sd  = NA
beta1.sd  = NA
beta2.sd  = NA
theta0.sd = NA
theta1.sd = NA
theta2.sd = NA

for(i in 1:length(Nvec)){
N = Nvec[i]
beta  = c(-1.5, 0.5, 0.02)                           # Unknown parameter
theta = c(2, -0.1, -0.01)                            # Unknown parameter
female = rbinom(n=N, size=1, prob=0.5)               # 50% Male, 50% Female
age = rpois(n=N, lambda=30)                          # Age with mean 30
x = as.matrix(data.frame(rep(1, N), female, age))

XB =  x %*% beta                                     # Linear aggregator with betas
XT =  x %*% theta                                    # Linear aggregator with thetas
pi.x    = i.logit(XB)                                # Pi|Male
gamma.x = i.logit(XT)                                # Gamma|Male

p = 0.1                                              # Randomization probability
p2 = 0.15                                            # Randomization probability for the anchor question
Attentive = rbinom(n=N, size=1, prob=gamma.x)        # Attentive R or not

# glm(pi.x~x, family=binomial)                       # CHECK
# glm(gamma.x~x, family=binomial)                    # CHECK


# SENSITIVE QUESTION OF INTERST
StatementA = rbinom(n=N, size=1, prob=pi.x)          # Sesitive item
StatementB = rbinom(n=N, size=1, prob=p)             # Randomization item
True.res = ifelse(StatementA != StatementB, 0, 1)    # Survey answer 

Obs.res = rep(NA, N)
Obs.res[Attentive==1] = True.res[Attentive==1]       # Observed answer for attentive Rs
Obs.res[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                               size=1, prob=0.5)     # Observed answer, otherwise


# NON-SENSITIVE ANCHOR QUESTION
StatementC = 0                                       # Attention "c"heck item (True prevalence is 0 for everyone)
StatementD = rbinom(n=N, size=1, prob=p2)            # Anchor question
True.res.prime = ifelse(StatementC != StatementD, 0, 1) # Survey answer in Anchor Question

Obs.res.prime = rep(NA, N)
Obs.res.prime[Attentive==1] = True.res.prime[Attentive==1] # Observed answer for attentive Rs
Obs.res.prime[Attentive==0] = rbinom(n=length(N[Attentive==0]),
                                     size=1, prob=0.5)# Observed answer, otherwise

Y = Obs.res
A = Obs.res.prime

dat <- cbind(Y, female, age, A, p, p2)                          # Observed data
dat <- as_tibble(dat)
head(dat) 

par(mfrow=c(1,2))
hist(pi.x, breaks=20)
hist(gamma.x, breaks=20)


#############################################################################
# (3) RESULTS
#############################################################################

result <- cmreg(Y~female+age+A, p=0.1, p2=0.15,
                init=rep(0.02,6), data=dat)
result


beta0[i]  = result[[1]][1]
beta1[i]  = result[[1]][2]
beta2[i]  = result[[1]][3]
theta0[i] = result[[1]][4]
theta1[i] = result[[1]][5]
theta2[i] = result[[1]][6]
beta0.sd[i]  = result[[2]][1]
beta1.sd[i]  = result[[2]][2]
beta2.sd[i]  = result[[2]][3]
theta0.sd[i] = result[[2]][4]
theta1.sd[i] = result[[2]][5]
theta2.sd[i] = result[[2]][6]
}


pdf("FigureC3.pdf", width=7, height=4.5)
par(oma=c(3,0,0,0), mfrow=c(2,3), mar=c(2,2,4,2)) # b,l,t,r
cole = "black"

plot(Nvec, beta0, type="n", xlab="",ylab="",main="",ylim=c(-6,2),las=1)
abline(h=beta[1], lty=2, col="firebrick")
arrows(y0=beta0-1.96*beta0.sd, y1=beta0+1.96*beta0.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, beta0, pch=16, cex=1.5)
title(expression(beta[0]), line=1, cex.main=2)

plot(Nvec, beta1, type="n", xlab="",ylab="",main="",ylim=c(-6,2),las=1)
abline(h=beta[2], lty=2, col="firebrick")
arrows(y0=beta1-1.96*beta1.sd, y1=beta1+1.96*beta1.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, beta1, pch=16, cex=1.5)
title(expression(beta[1]), line=1, cex.main=2)

plot(Nvec, beta2, type="n", xlab="",ylab="",main="",ylim=c(-6,2),las=1)
abline(h=beta[3], lty=2, col="firebrick")
arrows(y0=beta2-1.96*beta2.sd, y1=beta2+1.96*beta2.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, beta2, pch=16, cex=1.5)
title(expression(beta[2]), line=1, cex.main=2)


plot(Nvec, theta0, type="n", xlab="",ylab="",main="",ylim=c(-2,4),las=1)
abline(h=theta[1], lty=2, col="firebrick")
arrows(y0=theta0-1.96*theta0.sd, y1=theta0+1.96*theta0.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, theta0, pch=16, cex=1.5)
title(expression(theta[0]), line=1, cex.main=2)


plot(Nvec, theta1, type="n", xlab="",ylab="",main="",ylim=c(-6,2),las=1)
abline(h=theta[2], lty=2, col="firebrick")
arrows(y0=theta1-1.96*theta1.sd, y1=theta1+1.96*theta1.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, theta1, pch=16, cex=1.5)
title(expression(theta[1]), line=1, cex.main=2)


plot(Nvec, theta2, type="n", xlab="",ylab="",main=,ylim=c(-6,2),las=1)
abline(h=theta[3], lty=2, col="firebrick")
arrows(y0=theta2-1.96*theta2.sd, y1=theta2+1.96*theta2.sd,
       x0=Nvec, x1=Nvec, length=0, angle=0, col=cole, lwd=2)
points(Nvec, theta2, pch=16, cex=1.5)
title(expression(theta[2]), line=1, cex.main=2)

mtext(text="Sample Size",side=1,line=1,outer=TRUE)


dev.off()

end <- Sys.time()
start - end

#############################################################################
# END OF THIS R SOURCE FILE
#############################################################################